̥— title: “African American Achievements” author: “Anirudh Jayaraman” date: “11/06/2020” output: html —
Loading possibly relevant libraries (that we can keep adding to)
library(tidyverse)
## -- Attaching packages --------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.0.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
theme_set(new = theme_light())
Getting the data!
choose_how <- 1 # Set this to either 0 or 1
if(choose_how == 0){
# Either read with Github csv urls ------------------------------------------
firsts_url <- "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-09/firsts.csv"
science_url <- "https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-06-09/science.csv"
firsts <- readr::read_csv(firsts_url)
science <- readr::read_csv(science_url)
} else {
# Or read in with tidytuesdayR package --------------------------------------
# (https://github.com/thebioengineer/tidytuesdayR)
choose_again <- 1 # Set this to either 0 or 1
if(choose_again == 0){
tuesdata <- tidytuesdayR::tt_load('2020-06-09')
} else {
tuesdata <- tidytuesdayR::tt_load(2020, week = 24)
}
firsts <- tuesdata$firsts
science <- tuesdata$science
}
## --- Compiling #TidyTuesday Information for 2020-06-09 ----
## --- There are 2 files available ---
## --- Starting Download ---
##
## Downloading file 1 of 2: `firsts.csv`
## Downloading file 2 of 2: `science.csv`
## --- Download complete ---
View(firsts)
View(science)
firsts %>%
ggplot(mapping = aes(x = year, fill = category)) +
geom_histogram(bins = 50)
firsts %>%
ggplot(mapping = aes(x = year, fill = category)) +
geom_histogram(bins = 50) +
facet_wrap(~ category)
firsts %>% count(category, sort = TRUE)
## # A tibble: 8 x 2
## category n
## <chr> <int>
## 1 Arts & Entertainment 107
## 2 Education & Science 87
## 3 Politics 82
## 4 Military 73
## 5 Social & Jobs 57
## 6 Sports 38
## 7 Religion 21
## 8 Law 14
firsts %>%
count(category, sort = TRUE) %>%
mutate(category = fct_reorder(category, n)) %>%
ggplot(mapping = aes(x = n, y = category)) +
geom_col()
firsts %>% select(person)
## # A tibble: 479 x 1
## person
## <chr>
## 1 Gracia Real de Santa Teresa de Mose (later named Fort Mose) in Florida
## 2 Jupiter Hammon (poem An Evening Thought
## 3 Wentworth Cheswell, town constable in Newmarket, New Hampshire.[5]
## 4 Phillis Wheatley (Poems on Various Subjects, Religious and Moral)[6]
## 5 Silver Bluff Baptist Church, Aiken County, South Carolina[7][8][Note 1]
## 6 Prince Hall
## 7 the 1st Rhode Island Regiment[9]
## 8 James Derham, who did not hold an M.D. degree.[10] (See also 1847 firsts.)
## 9 Rev. Lemuel Haynes. He was ordained in the Congregational Church, which beca~
## 10 3,000 Black Loyalist slaves, who had escaped to British lines during the Am~
## # ... with 469 more rows
Removing any text from the person column following [ or (
firsts %>%
mutate(person = str_remove(person, pattern = "[\\(\\[].*"),
person = str_trim(person)) %>%
select(person)
## # A tibble: 479 x 1
## person
## <chr>
## 1 Gracia Real de Santa Teresa de Mose
## 2 Jupiter Hammon
## 3 Wentworth Cheswell, town constable in Newmarket, New Hampshire.
## 4 Phillis Wheatley
## 5 Silver Bluff Baptist Church, Aiken County, South Carolina
## 6 Prince Hall
## 7 the 1st Rhode Island Regiment
## 8 James Derham, who did not hold an M.D. degree.
## 9 Rev. Lemuel Haynes. He was ordained in the Congregational Church, which beca~
## 10 3,000 Black Loyalist slaves, who had escaped to British lines during the Am~
## # ... with 469 more rows
Changing the Firsts dataset to reflect cleaned Person column
firsts <- firsts %>%
mutate(person = str_remove(person, "[\\(\\[].*"),
person = str_trim(person))
tuesdata
## Available datasets:
## firsts
## science
##
Interactive Graphics
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(glue)
##
## Attaching package: 'glue'
## The following object is masked from 'package:dplyr':
##
## collapse
Attempting a timeline via plotly
g <- firsts %>%
ggplot(mapping = aes(x = year,
y = category,
color = category,
text = glue("{year}, {accomplishment},\n{person}"))) +
geom_point() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
panel.grid.major.y = element_blank()) +
labs(title = "Timeline of some notable African-American achievements",
caption = "Source: https://en.wikipedia.org/wiki/List_of_African-American_firsts")
ggplotly(g, tooltip = "text")
science
## # A tibble: 120 x 7
## name birth death occupation_s inventions_accompli~ references links
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 Amos, ~ 1918 2003 Microbiologist First African-Ameri~ 6, https://~
## 2 Alcorn~ 1940 NA Physicist; inv~ Invented a method o~ 7,8, https://~
## 3 Andrew~ 1930 1998 Mathematician Put forth the Andre~ 9, https://~
## 4 Alexan~ 1888 1958 Civil engineer Responsible for the~ <NA> https://~
## 5 Bailey~ 1825 1918 Inventor Folding bed 10, https://~
## 6 Ball, ~ 1892 1916 Chemist Extracted chaulmoog~ 11, https://~
## 7 Bannek~ 1731 1806 Almanac author~ Constructed wooden ~ 12, https://~
## 8 Banyag~ 1947 NA Mathematician Work on diffeomorph~ 13, https://~
## 9 Bashen~ 1957 NA Inventor; entr~ First African-Ameri~ 14, https://~
## 10 Bath, ~ 1942 2019 Ophthalmologist First African-Ameri~ 15,16,17, https://~
## # ... with 110 more rows
science %>%
count(occupation_s) %>%
arrange(desc(n)) %>% head(n = 10)
## # A tibble: 10 x 2
## occupation_s n
## <chr> <int>
## 1 Inventor 20
## 2 Chemist 8
## 3 Computer scientist 6
## 4 Mathematician 5
## 5 Physicist 4
## 6 Computer engineer 2
## 7 Engineer; inventor 2
## 8 Linguist 2
## 9 Mathematician; statistician 2
## 10 Psychologist 2
Some occupations are list separated by semicolons. The data currently has 120 rows and 7 columns. Let’s expand the dataset based on one occupation instance in each row.
science %>%
separate_rows(occupation_s, sep = ";") %>%
mutate(occupation = str_to_title(occupation_s)) %>%
count(occupation, sort = TRUE)
## # A tibble: 89 x 2
## occupation n
## <chr> <int>
## 1 "Inventor" 25
## 2 "Chemist" 9
## 3 " Inventor" 8
## 4 "Mathematician" 8
## 5 "Computer Scientist" 6
## 6 "Physicist" 6
## 7 " Social Scientist" 3
## 8 " Statistician" 3
## 9 "Engineer" 3
## 10 "Psychologist" 3
## # ... with 79 more rows
science %>%
separate_rows(occupation_s, sep = ";")
## # A tibble: 164 x 7
## name birth death occupation_s inventions_accomplis~ references links
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 Amos, ~ 1918 2003 "Microbiolog~ First African-Americ~ 6, https://e~
## 2 Alcorn~ 1940 NA "Physicist" Invented a method of~ 7,8, https://e~
## 3 Alcorn~ 1940 NA " inventor" Invented a method of~ 7,8, https://e~
## 4 Andrew~ 1930 1998 "Mathematici~ Put forth the Andrew~ 9, https://e~
## 5 Alexan~ 1888 1958 "Civil engin~ Responsible for the ~ <NA> https://e~
## 6 Bailey~ 1825 1918 "Inventor" Folding bed 10, https://e~
## 7 Ball, ~ 1892 1916 "Chemist" Extracted chaulmoogr~ 11, https://e~
## 8 Bannek~ 1731 1806 "Almanac aut~ Constructed wooden c~ 12, https://e~
## 9 Bannek~ 1731 1806 " surveyor" Constructed wooden c~ 12, https://e~
## 10 Bannek~ 1731 1806 " farmer" Constructed wooden c~ 12, https://e~
## # ... with 154 more rows
Filter the dataset for ‘scientists’ and ‘ians’ of any kind
science %>%
separate_rows(occupation_s, sep = ";") %>%
filter(str_detect(regex(pattern = "scientist", ignore_case = TRUE),
string = occupation_s) |
str_detect(string = occupation_s,
pattern = regex(".*ian")))
## # A tibble: 29 x 7
## name birth death occupation_s inventions_accomplis~ references links
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 Andrew~ 1930 1998 "Mathematicia~ Put forth the Andrew~ 9, https://~
## 2 Banyag~ 1947 NA "Mathematicia~ Work on diffeomorphi~ 13, https://~
## 3 Bharuc~ 1927 1985 "Mathematicia~ Probability theory a~ 24, https://~
## 4 Bharuc~ 1927 1985 " statisticia~ Probability theory a~ 24, https://~
## 5 Blackw~ 1919 2010 "Mathematicia~ First proposed the B~ 27, https://~
## 6 Blackw~ 1919 2010 " statisticia~ First proposed the B~ 27, https://~
## 7 Bowman~ 1923 2011 "Physician" Pathologist and gene~ 35,36, https://~
## 8 Chappe~ 1872 1941 "Electrician" Designed long-distan~ 54,55,56, https://~
## 9 Chappe~ 1925 2019 "Scientist an~ Valuable contributio~ <NA> https://~
## 10 Dean, ~ 1957 NA "Computer sci~ Led the team that de~ 58,59,60, https://~
## # ... with 19 more rows
science %>%
pull(links)
## [1] "https://en.wikipedia.org/wiki/Harold_Amos"
## [2] "https://en.wikipedia.org/wiki/George_Edward_Alcorn,_Jr."
## [3] "https://en.wikipedia.org/wiki/James_J._Andrews_(mathematician)"
## [4] "https://en.wikipedia.org/wiki/Archie_Alexander"
## [5] "https://en.wikipedia.org/wiki/Leonard_C._Bailey"
## [6] "https://en.wikipedia.org/wiki/Alice_Augusta_Ball"
## [7] "https://en.wikipedia.org/wiki/Benjamin_Banneker"
## [8] "https://en.wikipedia.org/wiki/Augustin_Banyaga"
## [9] "https://en.wikipedia.org/wiki/Janet_Emerson_Bashen"
## [10] "https://en.wikipedia.org/wiki/Patricia_Bath"
## [11] "https://en.wikipedia.org/wiki/Andrew_Jackson_Beard"
## [12] "https://en.wikipedia.org/wiki/Miriam_Benjamin"
## [13] "https://en.wikipedia.org/wiki/Leonidas_Berry"
## [14] "https://en.wikipedia.org/wiki/Albert_Turner_Bharucha-Reid"
## [15] "https://en.wikipedia.org/wiki/Keith_Black_(surgeon)"
## [16] "https://en.wikipedia.org/wiki/David_Blackwell"
## [17] "https://en.wikipedia.org/wiki/Henry_Blair_(inventor)"
## [18] "https://en.wikipedia.org/wiki/Kwabena_Boahen"
## [19] "https://en.wikipedia.org/wiki/Sarah_Boone"
## [20] "https://en.wikipedia.org/wiki/Edward_Bouchet"
## [21] "https://en.wikipedia.org/wiki/James_E._Bowman"
## [22] "https://en.wikipedia.org/wiki/Otis_Boykin"
## [23] "https://en.wikipedia.org/wiki/St._Elmo_Brady"
## [24] "https://en.wikipedia.org/wiki/Herman_Branson"
## [25] "https://en.wikipedia.org/w/index.php?title=Oscar_E.Brown&action=edit&redlink=1"
## [26] "https://en.wikipedia.org/wiki/Marie_Van_Brittan_Brown"
## [27] "https://en.wikipedia.org/wiki/William_Warrick_Cardozo"
## [28] "https://en.wikipedia.org/wiki/Ben_Carson"
## [29] "https://en.wikipedia.org/wiki/George_Robert_Carruthers"
## [30] "https://en.wikipedia.org/wiki/George_Washington_Carver"
## [31] "https://en.wikipedia.org/wiki/Charles_W._Chappelle"
## [32] "https://en.wikipedia.org/wiki/Emmett_Chappelle"
## [33] "https://en.wikipedia.org/wiki/Kenneth_and_Mamie_Clark"
## [34] "https://en.wikipedia.org/wiki/Kenneth_and_Mamie_Clark"
## [35] "https://en.wikipedia.org/wiki/David_Crosthwait"
## [36] "https://en.wikipedia.org/w/index.php?title=Nick_Curtis&action=edit&redlink=1"
## [37] "https://en.wikipedia.org/wiki/John_Dabiri"
## [38] "https://en.wikipedia.org/wiki/Marie_Maynard_Daly"
## [39] "https://en.wikipedia.org/wiki/Mark_Dean_(computer_scientist)"
## [40] "https://en.wikipedia.org/wiki/Charles_R._Drew"
## [41] "https://en.wikipedia.org/wiki/Paul_Du_Chaillu"
## [42] "https://en.wikipedia.org/wiki/Annie_Easley"
## [43] "https://en.wikipedia.org/wiki/Clarence_Ellis_(computer_scientist)"
## [44] "https://en.wikipedia.org/wiki/Bisi_Ezerioha"
## [45] "https://en.wikipedia.org/wiki/Lloyd_Noel_Ferguson"
## [46] "https://en.wikipedia.org/wiki/Roland_G._Fryer,_Jr."
## [47] "https://en.wikipedia.org/wiki/Sylvester_James_Gates"
## [48] "https://en.wikipedia.org/wiki/Sarah_E._Goode"
## [49] "https://en.wikipedia.org/wiki/Juan_E._Gilbert"
## [50] "https://en.wikipedia.org/wiki/George_Franklin_Grant"
## [51] "https://en.wikipedia.org/wiki/Joseph_L._Graves"
## [52] "https://en.wikipedia.org/wiki/Lisa_Green_(linguist)"
## [53] "https://en.wikipedia.org/wiki/Kevin_Greenaugh"
## [54] "https://en.wikipedia.org/wiki/Bessie_Blount_Griffin"
## [55] "https://en.wikipedia.org/wiki/Lloyd_Hall"
## [56] "https://en.wikipedia.org/wiki/James_Andrew_Harris"
## [57] "https://en.wikipedia.org/wiki/Walter_Lincoln_Hawkins"
## [58] "https://en.wikipedia.org/wiki/John_E._Hodge"
## [59] "https://en.wikipedia.org/wiki/Kerrie_Holley"
## [60] "https://en.wikipedia.org/wiki/Mary_Jackson_(engineer)"
## [61] "https://en.wikipedia.org/wiki/Shirley_Ann_Jackson"
## [62] "https://en.wikipedia.org/wiki/Erich_Jarvis"
## [63] "https://en.wikipedia.org/wiki/Thomas_L._Jennings"
## [64] "https://en.wikipedia.org/wiki/Lonnie_Johnson_(inventor)"
## [65] "https://en.wikipedia.org/wiki/Katherine_Johnson"
## [66] "https://en.wikipedia.org/wiki/Frederick_McKinley_Jones"
## [67] "https://en.wikipedia.org/wiki/Percy_Lavon_Julian"
## [68] "https://en.wikipedia.org/wiki/Ernest_Everett_Just"
## [69] "https://en.wikipedia.org/wiki/Rick_Kittles"
## [70] "https://en.wikipedia.org/wiki/Samuel_L._Kountz"
## [71] "https://en.wikipedia.org/wiki/Lewis_Howard_Latimer"
## [72] "https://en.wikipedia.org/wiki/Jerry_Lawson_(engineer)"
## [73] "https://en.wikipedia.org/wiki/Raphael_Carl_Lee"
## [74] "https://en.wikipedia.org/wiki/Beebe_Steven_Lynk"
## [75] "https://en.wikipedia.org/wiki/Mary_Eliza_Mahoney"
## [76] "https://en.wikipedia.org/wiki/Jan_Ernst_Matzeliger"
## [77] "https://en.wikipedia.org/wiki/Henry_Cecil_McBay"
## [78] "https://en.wikipedia.org/wiki/Elijah_McCoy"
## [79] "https://en.wikipedia.org/wiki/James_McLurkin"
## [80] "https://en.wikipedia.org/wiki/John_McWhorter"
## [81] "https://en.wikipedia.org/wiki/Ben_Montgomery"
## [82] "https://en.wikipedia.org/wiki/Willie_Hobbs_Moore"
## [83] "https://en.wikipedia.org/wiki/Garrett_Morgan"
## [84] "https://en.wikipedia.org/wiki/Thomas_Mensah_(engineer)"
## [85] "https://en.wikipedia.org/wiki/Alexander_Miles"
## [86] "https://en.wikipedia.org/wiki/Jerome_Nriagu"
## [87] "https://en.wikipedia.org/wiki/John_Ogbu"
## [88] "https://en.wikipedia.org/wiki/Kunle_Olukotun"
## [89] "https://en.wikipedia.org/wiki/Soni_Oyekan"
## [90] "https://en.wikipedia.org/wiki/Alice_H._Parker"
## [91] "https://en.wikipedia.org/wiki/Hildrus_Poindexter"
## [92] "https://en.wikipedia.org/wiki/Arlie_Petters"
## [93] "https://en.wikipedia.org/wiki/Lloyd_Quarterman"
## [94] "https://en.wikipedia.org/wiki/Earl_W._Renfroe"
## [95] "https://en.wikipedia.org/wiki/Norbert_Rillieux"
## [96] "https://en.wikipedia.org/wiki/Larry_Robinson_(chemist)"
## [97] "https://en.wikipedia.org/wiki/Archia_Ross"
## [98] "https://en.wikipedia.org/wiki/Jesse_Russell"
## [99] "https://en.wikipedia.org/wiki/Thomas_Sowell"
## [100] "https://en.wikipedia.org/wiki/Claude_Steele"
## [101] "https://en.wikipedia.org/wiki/Lee_Stiff"
## [102] "https://en.wikipedia.org/wiki/Window_Snyder"
## [103] "https://en.wikipedia.org/wiki/Lewis_Temple"
## [104] "https://en.wikipedia.org/wiki/Vivien_Thomas"
## [105] "https://en.wikipedia.org/wiki/Charles_Henry_Turner_(zoologist)"
## [106] "https://en.wikipedia.org/wiki/Neil_deGrasse_Tyson"
## [107] "https://en.wikipedia.org/wiki/Dorothy_Vaughan"
## [108] "https://en.wikipedia.org/wiki/Powtawche_Valerino"
## [109] "https://en.wikipedia.org/wiki/Arthur_B._C._Walker,_Jr."
## [110] "https://en.wikipedia.org/wiki/Madam_C._J._Walker"
## [111] "https://en.wikipedia.org/wiki/Warren_M._Washington"
## [112] "https://en.wikipedia.org/wiki/James_Edward_Maceo_West"
## [113] "https://en.wikipedia.org/wiki/J._Ernest_Wilkins,_Jr."
## [114] "https://en.wikipedia.org/wiki/Daniel_Hale_Williams"
## [115] "https://en.wikipedia.org/wiki/Scott_W._Williams"
## [116] "https://en.wikipedia.org/wiki/Walter_E._Williams"
## [117] "https://en.wikipedia.org/wiki/Granville_Woods"
## [118] "https://en.wikipedia.org/wiki/Jane_C._Wright"
## [119] "https://en.wikipedia.org/wiki/Louis_T._Wright"
## [120] "https://en.wikipedia.org/wiki/Roger_Arliner_Young"
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
read_html("https://en.wikipedia.org/wiki/David_Blackwell") %>%
html_node(".vcard") %>%
as.character
## [1] "<table class=\"infobox biography vcard\" style=\"width:22em\"><tbody>\n<tr><th colspan=\"2\" style=\"text-align:center;font-size:125%;font-weight:bold\"><div class=\"fn\" style=\"display:inline\">David Blackwell</div></th></tr>\n<tr><td colspan=\"2\" style=\"text-align:center\">\n<a href=\"/wiki/File:David_Blackwell_1999.jpeg\" class=\"image\"><img alt=\"David Blackwell 1999.jpeg\" src=\"//upload.wikimedia.org/wikipedia/commons/thumb/7/73/David_Blackwell_1999.jpeg/220px-David_Blackwell_1999.jpeg\" decoding=\"async\" width=\"220\" height=\"152\" srcset=\"//upload.wikimedia.org/wikipedia/commons/thumb/7/73/David_Blackwell_1999.jpeg/330px-David_Blackwell_1999.jpeg 1.5x, //upload.wikimedia.org/wikipedia/commons/7/73/David_Blackwell_1999.jpeg 2x\" data-file-width=\"400\" data-file-height=\"277\"></a><div>Blackwell in 1999</div>\n</td></tr>\n<tr>\n<th scope=\"row\">Born</th>\n<td>\n<div style=\"display:inline\" class=\"nickname\">David Harold Blackwell</div>\n<br><span style=\"display:none\">(<span class=\"bday\">1919-04-24</span>)</span>April 24, 1919<br><div style=\"display:inline\" class=\"birthplace\">\n<a href=\"/wiki/Centralia,_Illinois\" title=\"Centralia, Illinois\">Centralia, Illinois</a>, U.S.</div>\n</td>\n</tr>\n<tr>\n<th scope=\"row\">Died</th>\n<td>July 8, 2010<span style=\"display:none\">(2010-07-08)</span> (aged 91)<sup id=\"cite_ref-stl-post_1-0\" class=\"reference\"><a href=\"#cite_note-stl-post-1\">[1]</a></sup><br><div style=\"display:inline\" class=\"deathplace\">\n<a href=\"/wiki/Berkeley,_California\" title=\"Berkeley, California\">Berkeley, California</a>, U.S.</div>\n</td>\n</tr>\n<tr>\n<th scope=\"row\">Nationality</th>\n<td class=\"category\">American</td>\n</tr>\n<tr>\n<th scope=\"row\">Alma mater</th>\n<td>\n<a href=\"/wiki/University_of_Illinois_at_Urbana-Champaign\" class=\"mw-redirect\" title=\"University of Illinois at Urbana-Champaign\">University of Illinois at Urbana-Champaign</a> (BA, PhD)</td>\n</tr>\n<tr>\n<th scope=\"row\">Known for</th>\n<td>\n<a href=\"/wiki/Rao%E2%80%93Blackwell_theorem\" title=\"Rao–Blackwell theorem\">Rao–Blackwell theorem</a><br><a href=\"/wiki/Blackwell_channel\" title=\"Blackwell channel\">Blackwell channel</a><br><a href=\"/wiki/Arbitrarily_varying_channel\" title=\"Arbitrarily varying channel\">Arbitrarily varying channel</a><br><a href=\"/wiki/Determinacy\" title=\"Determinacy\">Games of imperfect information</a><br><a href=\"/wiki/Dirichlet_distribution\" title=\"Dirichlet distribution\">Dirichlet distribution</a><br><a href=\"/wiki/Bayesian_statistics\" title=\"Bayesian statistics\">Bayesian statistics</a><br><a href=\"/wiki/Mathematical_economics\" title=\"Mathematical economics\">Mathematical economics</a><br><a href=\"/wiki/Recursive_economics\" title=\"Recursive economics\">Recursive economics</a><br><a href=\"/wiki/Sequential_analysis\" title=\"Sequential analysis\">Sequential analysis</a>\n</td>\n</tr>\n<tr>\n<th scope=\"row\">Awards</th>\n<td>\n<a href=\"/wiki/Member_of_the_National_Academy_of_Sciences\" title=\"Member of the National Academy of Sciences\">Member of the National Academy of Sciences</a> (1965)<br><a href=\"/wiki/John_von_Neumann_Theory_Prize\" title=\"John von Neumann Theory Prize\">John von Neumann Theory Prize</a> (1979)<br><a href=\"/wiki/R._A._Fisher_Lectureship\" title=\"R. A. Fisher Lectureship\">R. A. Fisher Lectureship</a> (1986)</td>\n</tr>\n<tr><td colspan=\"2\" style=\"text-align:center\"><b>Scientific career</b></td></tr>\n<tr>\n<th scope=\"row\">Fields</th>\n<td class=\"category\">\n<a href=\"/wiki/Probability\" title=\"Probability\">Probability</a><br><a href=\"/wiki/Statistics\" title=\"Statistics\">Statistics</a><br><a href=\"/wiki/Logic\" title=\"Logic\">Logic</a><br><a href=\"/wiki/Game_theory\" title=\"Game theory\">Game theory</a><br><a href=\"/wiki/Dynamic_programming\" title=\"Dynamic programming\">Dynamic programming</a><sup id=\"cite_ref-gs_2-0\" class=\"reference\"><a href=\"#cite_note-gs-2\">[2]</a></sup>\n</td>\n</tr>\n<tr>\n<th scope=\"row\">Institutions</th>\n<td><a href=\"/wiki/University_of_California,_Berkeley\" title=\"University of California, Berkeley\">University of California, Berkeley</a></td>\n</tr>\n<tr>\n<th scope=\"row\"><a href=\"/wiki/Thesis\" title=\"Thesis\">Thesis</a></th>\n<td>\n<a rel=\"nofollow\" class=\"external text\" href=\"https://www.worldcat.org/oclc/493477066\"><i>Some properties of Markoff chains</i></a> <span style=\"font-size:97%;\">(1941)</span>\n</td>\n</tr>\n<tr>\n<th scope=\"row\"><a href=\"/wiki/Doctoral_advisor\" title=\"Doctoral advisor\">Doctoral advisor</a></th>\n<td>\n<a href=\"/wiki/Joseph_Leo_Doob\" class=\"mw-redirect\" title=\"Joseph Leo Doob\">Joseph Leo Doob</a><sup id=\"cite_ref-mathgene_3-0\" class=\"reference\"><a href=\"#cite_note-mathgene-3\">[3]</a></sup>\n</td>\n</tr>\n<tr>\n<th scope=\"row\">Notable students</th>\n<td>\n<a href=\"/wiki/Roger_J-B_Wets\" title=\"Roger J-B Wets\">Roger J-B Wets</a><br>Richard S. Bucy<sup id=\"cite_ref-mathgene_3-1\" class=\"reference\"><a href=\"#cite_note-mathgene-3\">[3]</a></sup>\n</td>\n</tr>\n<tr style=\"display:none\"><td colspan=\"2\">\n</td></tr>\n</tbody></table>\n"
read_html("https://en.wikipedia.org/wiki/David_Blackwell") %>%
html_node(".vcard") %>%
html_table()
## David Blackwell
## 1 Blackwell in 1999
## 2 Born
## 3 Died
## 4 Nationality
## 5 Alma mater
## 6 Known for
## 7 Awards
## 8 Scientific career
## 9 Fields
## 10 Institutions
## 11 Thesis
## 12 Doctoral advisor
## 13 Notable students
## 14
## David Blackwell
## 1 Blackwell in 1999
## 2 David Harold Blackwell(1919-04-24)April 24, 1919Centralia, Illinois, U.S.
## 3 July 8, 2010(2010-07-08) (aged 91)[1]Berkeley, California, U.S.
## 4 American
## 5 University of Illinois at Urbana-Champaign (BA, PhD)
## 6 Rao–Blackwell theoremBlackwell channelArbitrarily varying channelGames of imperfect informationDirichlet distributionBayesian statisticsMathematical economicsRecursive economicsSequential analysis
## 7 Member of the National Academy of Sciences (1965)John von Neumann Theory Prize (1979)R. A. Fisher Lectureship (1986)
## 8 Scientific career
## 9 ProbabilityStatisticsLogicGame theoryDynamic programming[2]
## 10 University of California, Berkeley
## 11 Some properties of Markoff chains (1941)
## 12 Joseph Leo Doob[3]
## 13 Roger J-B WetsRichard S. Bucy[3]
## 14
read_html("https://en.wikipedia.org/wiki/David_Blackwell") %>%
html_node(".vcard") %>%
html_table() %>%
set_names("key", "value") %>%
as_tibble()
## # A tibble: 14 x 2
## key value
## <chr> <chr>
## 1 "Blackwell in 1~ "Blackwell in 1999"
## 2 "Born" "David Harold Blackwell(1919-04-24)April 24, 1919Centralia,~
## 3 "Died" "July 8, 2010(2010-07-08) (aged 91)[1]Berkeley, California,~
## 4 "Nationality" "American"
## 5 "Alma mater" "University of Illinois at Urbana-Champaign (BA, PhD)"
## 6 "Known for" "Rao–Blackwell theoremBlackwell channelArbitrarily varying ~
## 7 "Awards" "Member of the National Academy of Sciences (1965)John von ~
## 8 "Scientific car~ "Scientific career"
## 9 "Fields" "ProbabilityStatisticsLogicGame theoryDynamic programming[2~
## 10 "Institutions" "University of California, Berkeley"
## 11 "Thesis" "Some properties of Markoff chains (1941)"
## 12 "Doctoral advis~ "Joseph Leo Doob[3]"
## 13 "Notable studen~ "Roger J-B WetsRichard S. Bucy[3]"
## 14 "" ""
science %>%
head
## # A tibble: 6 x 7
## name birth death occupation_s inventions_accomplish~ references links
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 Amos, ~ 1918 2003 Microbiologi~ First African-America~ 6, https://e~
## 2 Alcorn~ 1940 NA Physicist; i~ Invented a method of ~ 7,8, https://e~
## 3 Andrew~ 1930 1998 Mathematician Put forth the Andrews~ 9, https://e~
## 4 Alexan~ 1888 1958 Civil engine~ Responsible for the c~ <NA> https://e~
## 5 Bailey~ 1825 1918 Inventor Folding bed 10, https://e~
## 6 Ball, ~ 1892 1916 Chemist Extracted chaulmoogra~ 11, https://e~
science_html now has a column named html, containing XML
science_html <- science %>%
separate_rows(occupation_s, sep = ";") %>%
mutate(html = map(links, possibly(.f = read_html,
otherwise = NULL,
quiet = FALSE)))
## Error: HTTP error 404.
## Error: HTTP error 404.
## Error: HTTP error 404.
Let’s pull the html column (which is a list in reality)
science_html %>% pull(html) %>% head
## [[1]]
## {html_document}
## <html class="client-nojs" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject ...
##
## [[2]]
## {html_document}
## <html class="client-nojs" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject ...
##
## [[3]]
## {html_document}
## <html class="client-nojs" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject ...
##
## [[4]]
## {html_document}
## <html class="client-nojs" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject ...
##
## [[5]]
## {html_document}
## <html class="client-nojs" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject ...
##
## [[6]]
## {html_document}
## <html class="client-nojs" lang="en" dir="ltr">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset=UTF-8 ...
## [2] <body class="mediawiki ltr sitedir-ltr mw-hide-empty-elt ns-0 ns-subject ...
Anonymous function
extract_infobox <- . %>%
html_node(".vcard") %>%
html_table(header = FALSE) %>%
as.tibble()
extract_infobox
## Functional sequence with the following components:
##
## 1. html_node(., ".vcard")
## 2. html_table(., header = FALSE)
## 3. as.tibble(.)
##
## Use 'functions' to extract the individual functions.
Applying this function to science_html
science_html %>%
glimpse
## Rows: 164
## Columns: 8
## $ name <chr> "Amos, Harold", "Alcorn, George Edward, ...
## $ birth <dbl> 1918, 1940, 1940, 1930, 1888, 1825, 1892...
## $ death <dbl> 2003, NA, NA, 1998, 1958, 1918, 1916, 18...
## $ occupation_s <chr> "Microbiologist", "Physicist", " invento...
## $ inventions_accomplishments <chr> "First African-American department chair...
## $ references <chr> "6,", "7,8,", "7,8,", "9,", NA, "10,", "...
## $ links <chr> "https://en.wikipedia.org/wiki/Harold_Am...
## $ html <list> [<html class="client-nojs" lang="en" di...
science_html %>%
mutate(infobox = map(html, possibly(.f = extract_infobox,
otherwise = NULL,
quiet = TRUE)))
## Warning: `as.tibble()` is deprecated as of tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
## # A tibble: 164 x 9
## name birth death occupation_s inventions_acco~ references links html
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <lis>
## 1 Amos~ 1918 2003 "Microbiolo~ First African-A~ 6, http~ <xml~
## 2 Alco~ 1940 NA "Physicist" Invented a meth~ 7,8, http~ <xml~
## 3 Alco~ 1940 NA " inventor" Invented a meth~ 7,8, http~ <xml~
## 4 Andr~ 1930 1998 "Mathematic~ Put forth the A~ 9, http~ <xml~
## 5 Alex~ 1888 1958 "Civil engi~ Responsible for~ <NA> http~ <xml~
## 6 Bail~ 1825 1918 "Inventor" Folding bed 10, http~ <xml~
## 7 Ball~ 1892 1916 "Chemist" Extracted chaul~ 11, http~ <xml~
## 8 Bann~ 1731 1806 "Almanac au~ Constructed woo~ 12, http~ <xml~
## 9 Bann~ 1731 1806 " surveyor" Constructed woo~ 12, http~ <xml~
## 10 Bann~ 1731 1806 " farmer" Constructed woo~ 12, http~ <xml~
## # ... with 154 more rows, and 1 more variable: infobox <list>
science_html %>%
mutate(infobox = map(html, possibly(.f = extract_infobox,
otherwise = NULL,
quiet = TRUE))) %>%
glimpse
## Rows: 164
## Columns: 9
## $ name <chr> "Amos, Harold", "Alcorn, George Edward, ...
## $ birth <dbl> 1918, 1940, 1940, 1930, 1888, 1825, 1892...
## $ death <dbl> 2003, NA, NA, 1998, 1958, 1918, 1916, 18...
## $ occupation_s <chr> "Microbiologist", "Physicist", " invento...
## $ inventions_accomplishments <chr> "First African-American department chair...
## $ references <chr> "6,", "7,8,", "7,8,", "9,", NA, "10,", "...
## $ links <chr> "https://en.wikipedia.org/wiki/Harold_Am...
## $ html <list> [<html class="client-nojs" lang="en" di...
## $ infobox <list> [NULL, NULL, NULL, NULL, <tbl_df[13 x 2...
science_html %>%
mutate(infobox = map(html, possibly(.f = extract_infobox,
otherwise = NULL,
quiet = TRUE))) %>%
filter(!map_lgl(infobox, is.null))
## # A tibble: 127 x 9
## name birth death occupation_s inventions_acco~ references links html
## <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <lis>
## 1 Alex~ 1888 1958 "Civil engi~ Responsible for~ <NA> http~ <xml~
## 2 Ball~ 1892 1916 "Chemist" Extracted chaul~ 11, http~ <xml~
## 3 Bann~ 1731 1806 "Almanac au~ Constructed woo~ 12, http~ <xml~
## 4 Bann~ 1731 1806 " surveyor" Constructed woo~ 12, http~ <xml~
## 5 Bann~ 1731 1806 " farmer" Constructed woo~ 12, http~ <xml~
## 6 Bany~ 1947 NA "Mathematic~ Work on diffeom~ 13, http~ <xml~
## 7 Bash~ 1957 NA "Inventor" First African-A~ 14, http~ <xml~
## 8 Bash~ 1957 NA " entrepren~ First African-A~ 14, http~ <xml~
## 9 Bash~ 1957 NA " professio~ First African-A~ 14, http~ <xml~
## 10 Bath~ 1942 2019 "Ophthalmol~ First African-A~ 15,16,17, http~ <xml~
## # ... with 117 more rows, and 1 more variable: infobox <list>